polity2index <- function (polity) {
  if (polity == -66)
    return(22)
  if (polity == -77)
    return(23)
  if (polity == -88)
    return(24)
  return(polity + 11)
}

transition.matrix <- function (l,n) {
  tm <- matrix(0,n,n)
  for (i in 1:(length(l)-1))
    tm[l[i],l[i+1]] <- tm[l[i],l[i+1]] + 1
  tm
}

track.changes <- function (l,var,d=0.1) {
  changed <- abs(l[2:nrow(l),var]-l[1:(nrow(l)-1),var]) >= d
  changed <- !is.na(changed) & changed
  return(data.frame(from=l[c(changed,F),],to=l[c(F,changed),]))
}

track.changes.all <- function (data,var,vars,...) {
  do.call("rbind",lapply(data,function(x) track.changes(x[,vars],var,...)))
}

track.changes.history <- function (l,var,d=0.1,years=1) {
  changed <- abs(l[2:nrow(l),var]-l[1:(nrow(l)-1),var]) >= d
  changed <- !is.na(changed) & changed
  ind <- which(c(F,changed))
  ind <- ind[ind>max(years)]
  from <- list()
  for (i in 1:length(years)) {
    from[[i]] <- l[ind-years[i],]
    names(from[[i]]) <- paste(names(from[[i]]),".y",years[i],sep="")
  }
  return(data.frame(from=from,to=l[ind,]))
}

track.changes.history.all <- function (data,var,vars,...) {
  do.call("rbind",lapply(data,function(x) track.changes.history(x[,vars],var,...)))
}

track.changes.future <- function (l,var,d=0.1,years=1) {
  changed <- abs(l[2:nrow(l),var]-l[1:(nrow(l)-1),var]) >= d
  changed <- !is.na(changed) & changed
  ind <- which(c(changed,F))
  ind <- ind[ind<length(changed)-max(years)]
  to <- list()
  for (i in 1:length(years)) {
    to[[i]] <- l[ind+years[i],]
    names(to[[i]]) <- paste(names(to[[i]]),".y",years[i],sep="")
  }
  return(data.frame(from=l[ind,],to=to))
}

track.changes.future.all <- function (data,var,vars,...) {
  do.call("rbind",lapply(data,function(x) track.changes.future(x[,vars],var,...)))
}

multdim.trans <- function (vars) {
  mult <- numeric(length(vars))
  for (i in 1:length(mult))
    mult[length(mult)-i+1] <- 10^(i-1)
  nw <- Reduce('+',lapply(data,function(x) transition.matrix(colSums(mult*t(floor(10*x[,vars]))),10^length(vars))))
  
  polyarchy.incr <- matrix(NA,nrow(nw),ncol(nw))
  for (i in 1:nrow(nw))
    for (j in 1:ncol(nw)) {
      i.v <- c(floor(i/100),floor(i/10)%%100,i%%10)
      j.v <- c(floor(j/100),floor(j/10)%%100,j%%10)
      i.mean <- weighted.mean(i.v,c(2,1,1))
      j.mean <- weighted.mean(j.v,c(2,1,1))
      polyarchy.incr[i,j] <- j.mean > i.mean
    }
  nw[!polyarchy.incr] <- 0
  
  require(igraph)
  g <- graph.adjacency(nw,weighted=T,diag=F)
  names <- which(degree(g)>=10)
  g <- delete.vertices(g,which(degree(g)<10))
  V(g)$name <- names
  V(g)$color <- paste("#",floor(names/100),0,floor(names/10)%%10,0,names%%10,0,sep="")
  plot.g <- plot.igraph(g,vertex.size=8,vertex.label.cex=0.7,vertex.label.color="white",vertex.frame.color=NA,edge.width=0.25*E(g)$weight,edge.arrow.size=0.2,layout=layout.reingold.tilford(g,circular=F),main=vars)
}

trans.trans.matrix <- function (l,n) {
  tm <- matrix(0,n^2,n^2)
  if (nrow(l) > 1)
    for (i in 1:(nrow(l)-1))
      if (!is.na(l[i,1])&!is.na(l[i+1,1])&!is.na(l[i,2])&!is.na(l[i+1,2]))
        tm[n*(l[i,1]-1)+l[i+1,1],n*(l[i,2]-1)+l[i+1,2]] <- tm[n*(l[i,1]-1)+l[i+1,1],n*(l[i,2]-1)+l[i+1,2]] + 1
  tm
}

trans.trans <- function (data,var1,var2) {
  n <- Reduce(max,lapply(data,function(x) max(x[,c(var1,var2)],na.rm=T)))
  Reduce('+',lapply(data,function(x) trans.trans.matrix(x[,c(var1,var2)]+1,n+1)))
}

trans.trans.5 <- function (data,var1,var2) {
  Reduce('+',lapply(data,function(x) trans.trans.matrix(x[,c(var1,var2)]+1,5)))
}

multdim.trans.cat <- function (data,start,end,key) {
  Reduce('+',Filter(Negate(is.null),lapply(data,function(x) multdim.trans.matrix(x,start,end,key))))
}

multdim.trans.matrix <- function (l,start,end,key) {
  trans.i <- select.trans(l,start,end)
  #l <- l[select.trans(l,start,end)]
  paths <<- c(paths,lapply(trans.i,function(x)l[x][l[x[1:(length(x)-1)]]!=l[x[2:length(x)]]]))
  tm <- Reduce('+',Filter(Negate(is.null),lapply(trans.i,function(x)transition.matrix(l[x],max(key)))))
  #if (end == key[length(key)-2])
  #  tm[end,] <- 0 # remove transitions from democracy
  #else if (end == key[length(key)-3] & start != end)
  #  tm[end,] <- 0 # remove transitions from autocracy
  tm
}

select.trans <- function (l,start,end) {
  lstart <- which(l==start)
  starts <- length(lstart)
  if (starts==0)
    return(numeric())
  lstart <- c(lstart,length(l)+1)
  lend <- which(l==end)
  trans.i <- list()
  for (i in 1:starts) {
    suppressWarnings(j <- min(which(lend%in%(lstart[i]+1):lstart[i+1])))
    if (j < Inf)
      if (start!=end | !((start+1)%in%l[lstart[i]:lend[j]]))
        trans.i <- c(trans.i,list(lstart[i]:lend[j]))
  }
  #if (start==end)
  #  trans.i <- unique(trans.i)
  trans.i
}

plot.mdt <- function (data,vars,npols,div=1,thold=0,na=T,edge.arrow.size=0.4,vertex.scale=1,start=0,end=1,rm.first=F,trans.time=F,...) {
  nvars <- length(vars)
  npols <- npols[vars]
  n <- max(npols)
  key <- recode.key(data,vars,npols)
  data.r <- recode.data(data,vars,npols,key)
  start.r <- key[length(key)-3+start]
  end.r <- key[length(key)-3+end]
  paths <<- list()
  nw <- multdim.trans.cat(data.r,start.r,end.r,key)
  trans.time.string <- ""
  nw[start.r,start.r] <- 0
  mean.trans.time <<- sum(nw) / sum(nw[start.r,])
  if (trans.time)
    trans.time.string <- paste("\nMean transition time:",round(mean.trans.time,2),"years")
  nw <<- nw
  nw <- nw - diag(diag(nw))
  nw[nw<thold] <- 0
  nw <- floor(nw/div)
  if (!na) {
    nw[key[(length(key)-1):length(key)],] <- 0
    nw[,key[(length(key)-1):length(key)]] <- 0
  }
  paths <<- paths[sapply(paths,function(x)length(x)>0)]
  paths.t <<- sort(table(unlist(lapply(paths,function(v)paste(sapply(v,function(x)decode.number(which(key==x),npols,length(key))),collapse=" ")))),T)
  require(igraph)
  g <- graph.adjacency(nw,weighted=T,diag=F)
  V(g)$name <- sapply(which(key>0),function(n)decode.number(n,npols,length(key)))
  if (rm.first)
    V(g)$name <- sapply(V(g)$name,function(s) if (!is.na(strtoi(s))) substring(s,2) else s)
  g <- delete.vertices(g,which(degree(g)<1))
  V(g)$color <- "#91BAF2"; V(g)$color[which(V(g)$name=="A")] <- "#1239E6"; V(g)$color[which(V(g)$name=="D")] <- "#D43131"; V(g)$color[which(V(g)$name=="x")] <- "#888888"; V(g)$color[which(V(g)$name=="na")] <- "#888888"
  plot(g,vertex.size=4*nvars*vertex.scale,vertex.label.cex=0.7*vertex.scale,vertex.label.color="white",vertex.frame.color=NA,edge.width=0.25*E(g)$weight,edge.arrow.size=edge.arrow.size,sub=paste(c(vars,trans.time.string),collapse=" "),...)
}

recode.data <- function (data,vars,npols,key) {
  lapply(data,function(x) recode.data.country(x[,vars]+1,npols[vars],key))
}

recode.data.country <- function (l,npols,key) {
  key[apply(l,1,function(v)recode.vector(v,npols,length(key)))]
}

recode.key <- function (data,vars,npols) {
  potential.pols <- prod(c(npols[vars[1]]-2,npols[vars[2:length(vars)]])) + 4
  realised.pols <- Reduce('|',lapply(data,function(x) recode.key.country(x[,vars]+1,npols,potential.pols)))
  n.realised.pols <- sum(realised.pols)
  realised.pols[realised.pols] <- 1:n.realised.pols
  realised.pols
}

recode.key.country <- function (l,npols,potential.pols) {
  realised.pols <- logical(potential.pols)
  recoded.pols <- apply(l,1,function(x)recode.vector(x,npols,potential.pols))
  realised.pols[recoded.pols] <- T
  realised.pols
}

recode.vector <- function (v,npols,potential.pols) {
  if (is.na(v[1]))
    return(potential.pols-1)
  if (v[1]==1)
    return(potential.pols-3)
  if (v[1]==npols[1])
    return(potential.pols-2)
  if (sum(is.na(v))>0)
    return(potential.pols)
  v[1] <- v[1] - 1
  value <- 0
  for (i in 1:(length(v)-1))
    value <- value + (v[i]-1)*prod(npols[(i+1):length(v)])
  value <- value + v[length(v)]
  value
}

decode.number <- function (n,npols,potential.pols) {
  if (n==potential.pols-3)
    return("A")
  if (n==potential.pols-2)
    return("D")
  if (n==potential.pols-1)
    return("x")
  if (n==potential.pols)
    return("na")
  number <- ""
  remainder <- n
  npols[1] <- npols[1] - 2
  for (i in 1:(length(npols)-1)) {
    denom <- prod(npols[(i+1):length(npols)])
    div <- floor((remainder-1)/denom)
    number <- paste(number,div+1+(i==1),sep="")
    remainder <- remainder - div * denom
  }
  number <- paste(number,remainder,sep="")
  number
}

trans.times.2 <- function (data,npols,var,start,end,na.rm=F) {
  start.r <- 1
  if (start == 1)
    start.r <- npols[var]
  end.r <- npols[var]
  if (end == 0)
    end.r <- 1
  final <- end.r
  if (start == end)
    final <- npols[var]
  trans.times(data,var,start.r,end.r,final,na.rm)
}

trans.times <- function (data,var,start,end,final=end,na.rm=F) {
  do.call("rbind",lapply(data,function(x) trans.time(x[,c(var,"year","country_name")],start-1,end-1,final,na.rm)))
}

trans.time <- function (l,start,end,final=end,na.rm=F,returnI=F) {
  lstart <- which(l[,1]==start)
  starts <- length(lstart)
  if (starts==0)
    return(numeric())
  lstart <- c(lstart,nrow(l)+1)
  lend <- which(l[,1]==end)
  trans.years <- numeric()
  start.years <- integer()
  end.years <- integer()
  countries <- vector()
  start.i <- integer()
  end.i <- integer()
  for (i in 1:starts) {
    suppressWarnings(j <- min(which(lend%in%(lstart[i]+1):lstart[i+1])))
    if (j < Inf)
      if ((start!=end | !(final%in%l[lstart[i]:lend[j],1])) & (!na.rm | sum(is.na(l[lstart[i]:lend[j],1])) == 0)) {
        trans.years <- c(trans.years,lend[j]-lstart[i])
        start.years <- c(start.years,l[lstart[i],2])
        end.years <- c(end.years,l[lend[j],2])
        countries <- c(countries,as.character(l[lstart[i],3]))
        start.i <- c(start.i,lstart[i])
        end.i <- c(end.i,lend[j])
      }
  }
  if (start==end) {
    start.years <- start.years[trans.years>1]
    end.years <- end.years[trans.years>1]
    countries <- countries[trans.years>1]
    start.i <- start.i[trans.years>1]
    end.i <- end.i[trans.years>1]
    trans.years <- trans.years[trans.years>1]
  }
  if (returnI)
    data.frame(country=countries,start=start.years,end=end.years,time=trans.years,start.i,end.i)
  else
    data.frame(country=countries,start=start.years,end=end.years,time=trans.years)
}

trans.other.vars.2 <- function (data,npols,vars,year,pre=T,var,start,end,na.rm=F) {
  start.r <- 1
  if (start == 1)
    start.r <- npols[var]
  end.r <- npols[var]
  if (end == 0)
    end.r <- 1
  final <- end.r
  if (start == end)
    final <- npols[var]
  trans.other.vars(data,vars,year,pre,var,start.r,end.r,final,na.rm)
}

trans.other.vars <- function (data,vars,year,pre=T,var,start,end,final=end,na.rm=F) {
  do.call("rbind",lapply(data,function(x) trans.add.vars(trans.time(x[,c(var,"year","country_name")],start-1,end-1,final,na.rm,returnI=T),x[,vars],vars,year,pre)))
}

trans.other.vars.mult.years <- function (data,npos,vars,years,pre=T,var,start,end,na.rm=F) {
  df <- do.call("rbind",lapply(years,function(n)data.frame(trans.other.vars.2(data,npols,vars,n,pre=T,var,start,end,na.rm),year=n)))
  df <- df[,c("country","start","end","year",vars)]
  df[with(df,order(country,start,year)),]
}

trans.add.vars <- function (trans.df,data,vars,year,pre) {
  if (length(trans.df) > 0 && nrow(trans.df) > 0) {
    toAdd <- data.frame(matrix(nrow=nrow(trans.df),ncol=length(vars),dimnames=list(NULL,vars)))
    if (pre)
      toAdd[(trans.df$start.i-year)%in%(1:nrow(data)),] <- data[trans.df$start.i[(trans.df$start.i-year)%in%(1:nrow(data))]-year,vars]
    else
      toAdd[(trans.df$end.i+year)%in%(1:nrow(data)),] <- data[trans.df$end.i[(trans.df$end.i+year)%in%(1:nrow(data))]+year,vars]
    data.frame(trans.df,toAdd)
  }
}

data.subset <- function (data,country.years) {
  if (is.vector(country.years))
    data.sub <- lapply(data,function(cdata) cdata[cdata$year%in%(country.years[1]:country.years[2]),])
  else {
    cy <- data.frame(country.years,ind=sapply(floor(as.numeric(row.names(trans.years))),toString))
    data.sub <- apply(cy,1,function(row) data[[row["ind"]]][data[[row["ind"]]]$year%in%(row["start"]:row["end"]),])
  }
  data.sub[sapply(data.sub,nrow)>1]
}

surrounding.years <- function (country.years,years,pre=T) {
  if (pre) {
    country.years$end <- country.years$start
    country.years$start <- country.years$start - years
  }
  else {
    country.years$start <- country.years$end
    country.years$end <- country.years$end + years
  }
  country.years
}

quantile.by.value <- function (data,var,vars,q) {
  sapply(vars,function(v)tapply(data[,v],as.factor(data[,var]),function(x)round(quantile(x,q,na.rm=T))))
}

quantile.by.value.all <- function (data,vars,q) {
  ql <- lapply(vars,function(var)quantile.by.value(data,var,vars[vars!=var],q))
  names(ql) <- vars
  ql
}

quantile.by.value.all.changes <- function (ql,table=F) {
  changes <- lapply(ql[sapply(sapply(ql,nrow),function(x)!is.null(x))],quantile.by.value.changes)
  if (table) {
    vars.m <- matrix(unlist(sapply(names(unlist(changes)),function(s)strsplit(s,".",T))),ncol=2,byrow=T)
    ncats <- unlist(sapply(ql,nrow))
    data.frame(iv=vars.m[,1],dv=vars.m[,2],changes=unlist(changes),iv.cats=ncats[vars.m[,1]],dv.cats=ncats[vars.m[,2]],row.names=1:nrow(vars.m))
  }
  else
    changes
}

quantile.by.value.changes <- function (q) {
  apply(q,2,function(col) {changes <- col[2:length(col)]-col[1:(length(col)-1)]; if (sum(is.na(changes))==0&all(changes>=0)) sum(changes>0) else -1})
}

paths.mdt <- function (data,vars,npols,div=1,thold=0,na=T,start=0,end=1) {
  nvars <- length(vars)
  npols <- npols[vars]
  n <- max(npols)
  key <- recode.key(data,vars,npols)
  data.r <- recode.data(data,vars,npols,key)
  start.r <- key[length(key)-3+start]
  end.r <- key[length(key)-3+end]
  paths <<- list()
  nw <- multdim.trans.cat(data.r,start.r,end.r,key)
  nw[start.r,start.r] <- 0
  nw <- nw - diag(diag(nw))
  nw[nw<thold] <- 0
  nw <- floor(nw/div)
  if (!na) {
    nw[key[(length(key)-1):length(key)],] <- 0
    nw[,key[(length(key)-1):length(key)]] <- 0
  }
  paths <- paths[sapply(paths,function(x)length(x)>0)]
  paths.t <- sort(table(unlist(lapply(paths,function(v)paste(sapply(v,function(x)decode.number(which(key==x),npols,length(key))),collapse=" ")))),T)
  paths.t
}

compare.paths.bw.vars <- function (data,vars,npols,...) {
  n <- length(vars)
  m <- matrix(NA,n,n,dimnames=list(vars,vars))
  for (i in 1:n)
    for (j in (1:n)[-i]) {
      paths.t <- paths.mdt(data,c(vars[i],vars[j]),npols,...)
      m[i,j] <- paths.t[min(which(sapply(names(paths.t),nchar)>3))]
    }
  m
}

unique.values <- function (data,vars,na.rm=T) {
  anyNA <- function(df) {
    apply(is.na(df), 1, any)
  }
  
  v1 <- 1:(nrow(data)-1)
  v2 <- 2:nrow(data)
  
  if (length(vars)==1)
    unique <- suppressWarnings(anyNA(data[,vars]) | 
                               data$country_name[v1] != data$country_name[v2] | 
                               data[v2,vars]-data[v1,vars] != 0)
  else
    unique <- suppressWarnings(anyNA(data[,vars]) | 
                               data$country_name[v1] != data$country_name[v2] |
                               !apply(data[v2,vars]-data[v1,vars] == 0, 1, all))
  if (na.rm)
    unique[anyNA(data[v2,vars])] <- F
  unique <- c(T,unique)
  unique
}

freq.tables <- function (data,vars) {
  tl <- list()
  for (i in 1:(length(vars)-1))
    for (j in (i+1):length(vars))
      tl[[paste(vars[i],vars[j],sep=".")]] <- table(data[,vars[i]],data[,vars[j]])
  tl
}

freq.off.diags <- function (tl,down=T,table=T) {
  freq <- sapply(tl,freq.off.diag)
  if (table) {
    names.m <- matrix(unlist(sapply(names(freq),function(s)strsplit(s,".",T))),ncol=2,byrow=T)
    data.frame(var1=names.m[,1],var2=names.m[,2],freq,row.names=1:length(freq))
  }
  else
    freq
}

freq.off.diag <- function (t,down=T) {
  t.up <- sum(t[row(t)<col(t)])
  t.down <- sum(t[row(t)>col(t)])
  if (down)
    t.down / (t.up + t.down)
  else
    t.up / (t.up + t.down)
}

write.xlsx.list <- function (l,file,...) {
  require(xlsx)
  for (i in seq_along(l)) write.xlsx(l[[i]],file,names(l)[i],append=(i!=1),...)
}

write.csv.list <- function (l,dir,...) {
  for (i in seq_along(l)) write.csv(l[[i]],paste(dir,"/",names(l)[i],".csv",sep=""),...)
}

write.tables <- function (l,file,row.names=F,col.names=F,sep=",") {
  for (i in seq_along(l)) {write.table(names(l)[i],file,row.names=F,col.names=F,append=T); write.table(l[[i]],file,sep=sep,row.names=row.names,col.names=col.names,append=T)}
}

trans.matrix <- function (data,var,npols) {
  Reduce('+',lapply(data,function(l)transition.matrix(l[,var],npols[var])))
}

trans.matrices <- function (data,vars,npols) {
  l <- lapply(vars,function(var)trans.matrix(data,var,npols))
  names(l) <- vars
  l
}

vectors.as.strings <- function (data) {
  apply(data,1,function(v)paste(v,collapse=""))
}

unique.strings <- function (data,vars,na.rm=T) {
  data.unique <- data[unique.values(data,vars),c("country_id",vars)]
  if (na.rm)
    data.unique <- data.unique[complete.cases(data.unique[,vars]),]
  data.list <- split(data.unique,data.unique[,"country_id"])
  sapply(vars,function(var)sapply(data.list,function(l)paste(l[,var],collapse="")))
}

plot.and.count.freq <- function (data,var1,var2,unique=F,alpha=.3) {
  if (unique)
    data <- data[unique.values(data.all,c(var1,var2)),]
  data <- data[!is.na(data[,var1])&!is.na(data[,var2]),]
  v1 <- data[,var1]
  v2 <- data[,var2]
  require(ggplot2)
  print(qplot(v1,v2,alpha=I(alpha),xlab=var1,ylab=var2)+geom_abline(colour="white"))
  cond <- v1 != v2
  v1 <- v1[cond]
  v2 <- v2[cond]
  more <- sum(v1>v2) / length(v1)
  more
}

subset.trans <- function (data,var,from,to,pre=0,post=0) {
  subset.trans.cty <- function (cty) {
    start <- numeric()
    end <- numeric()
    n <- nrow(cty)
    less <- c(which(cty[,var]<=from),n+1)
    more <- which(cty[,var]>=to)
    if (length(less)>1&length(more)>0) {
      for (i in 1:(length(less)-1)) {
        suppressWarnings(j <- min(which(more%in%(less[i]+1):less[i+1])))
        if (j < Inf) {
          start <- c(start,less[i])
          end <- c(end,more[j])
        }
      }
      ind <- numeric()
      for (i in seq_along(start))
        ind <- union(ind,max(1,start[i]-pre):min(n,end[i]+post))
      cty[ind,]
    }
    else
      NULL
  }
  lapply(data,subset.trans.cty)
}
